package xiaoa.java.spider;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.helper.HttpConnection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

/**
 * Created by xiaoa on 2017/2/27.
 */
public class WeiBoVerifiedSpider {

    /**
     * 抓取信息
     * @param author_id
     * @return
     * @throws Throwable
     */
    public static String  fetch (String author_id)throws  Throwable{

        // 建立连接
        Connection conn =  HttpConnection.connect("http://weibo.com/u/" +author_id);
        conn.header("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
        conn.header("Accept-Encoding" , "gzip, deflate, sdch");
        conn.header("Host" , "weibo.com");
        conn.header("User-Agent" , "360Spider");
        Connection.Response response = conn.execute();

        String html = response.body();

        return html;

    }

    /**
     * 解析
     * @param html
     * @return
     * @throws Throwable
     */
    public static  String parse(String html)throws  Throwable{

        if (html == null || html.equals("")){
            throw  new RuntimeException("参数有误");
        }

        // 解析文档
        Document document = Jsoup.parse(html);

        Elements handElements = document.select(".photo_wrap");

        Element auth = handElements.get(0).nextElementSibling();

        //
        String  verified = null;

        if (auth != null){
            verified = auth.child(0).className();
        }

        return verified;
    }

    /**
     * 填充  Verified 字段
     * @throws Throwable
     */
    public static void fillVerified(UserBean bean)throws  Throwable{

        if (bean == null || bean.author_id == null || bean.author_id.equals("")){
            return ;
        }

        // 抓取网页
        String html = fetch(bean.author_id);

        if (html == null || html.equals("")){
            System.out.print("=========== author_id  = " + bean.author_id + "   fetch NULL ");
        }

        // 解析
        String verified = parse(html);

        bean.verified = verified;


    }


    /***
     * 用户对象
     */
    public static class  UserBean{

        // 用户id
       public  String author_id;

        // 认证信息
        public String verified;

        public String getAuthor_id() {
            return author_id;
        }

        public void setAuthor_id(String author_id) {
            this.author_id = author_id;
        }

        public String getVerified() {
            return verified;
        }

        public void setVerified(String verified) {
            this.verified = verified;
        }
    }


    ////  创建一个队列
    public static void main(String[]  agrs)throws  Throwable{

        //  等待填充队列
        final BlockingQueue<UserBean> waitFillQueue  = new ArrayBlockingQueue<UserBean>(10000);

        //  等待修改队列
        final BlockingQueue<UserBean> waitUpdateQueue  = new ArrayBlockingQueue<UserBean>(10000);

        // 添加线程  添加到  待填充队列
        Runnable  addRun =  new Runnable(){
            public void run(){

                try{
                    while(true){

                        // TODO  需要写方法 添加到 待填充队列

                        UserBean  test0 = new UserBean();
                        test0.author_id   = "5871763776";

                        waitFillQueue.add(test0);

                        System.out.println("========= add  author_id = " + test0.author_id + " verified = " + test0.verified);


                        break;

                    }

                }catch (Throwable e){
                    e.printStackTrace();
                }

            }
        };

        // 保存线程  保存修改
        Runnable  saveRun =  new Runnable(){
            public void run(){

                try{

                    while(true){
                        // 从队列中获取一个
                        UserBean  bean = waitUpdateQueue.take();

                        // TODO  需要写保存方法

                        System.out.println("========= save  author_id = " + bean.author_id + " verified = " + bean.verified);
                    }

                }catch (Throwable e){
                    e.printStackTrace();
                }



            }
        };

        // 填充线程
        Runnable  fillRun =  new Runnable(){
            public void run(){

                try{
                    while(true){

                        // 从队列中获取一个
                        UserBean  bean = waitFillQueue.take();

                        // 填充
                        fillVerified(bean);

                        System.out.println("========= fill  author_id = " + bean.author_id + " verified = " + bean.verified);

                        // 添加到保存队列
                        waitUpdateQueue.add(bean);

                    }

                }catch (Throwable e){
                    e.printStackTrace();
                }


            }
        };


        // 开启线程
        //  创建添加线程
        for (int i = 0 ; i < 1 ; i ++){
            Thread  thread = new Thread(addRun);
            thread.setName("ThreadName_add_" + i);
            thread.start();
        }

        //  创建保存线程
        for (int i = 0 ; i < 1 ; i ++){
            Thread  thread = new Thread(saveRun);
            thread.setName("ThreadName_save_" + i);
            thread.start();

        }

        // 创建填充线程
        for (int i = 0 ; i < 1 ; i ++){
            Thread  thread = new Thread(fillRun);
            thread.setName("ThreadName_fill_" + i);
            thread.start();

        }



    }



}
